"""
specifying constants and some general configurations
"""
import datetime
from transformers import BertTokenizer, BertForSequenceClassification

# GENERAL CONSTANTS
# setiment string values
LABEL_VALUES = ["positive", "neutral", "negative"]

# recommended size is 16 or 32
BATCH_SIZE = 32

# Set the maximum sequence length;
# 153 is max length of all sentences
MAX_LEN = 180

def get_model_filename():
    """
    use current date as verison for possible multiple different models
    """
    model_version = datetime.datetime.now().strftime("%d-%m-%Y_%H-%M")
    return f"fine-tuned-model_08-04-2021_23-42.pt"


# BERT PREPROCESSING
def get_tokenizer():
    """
	Getting BERT-Tokenizer:
	BERT-Base, Uncased: 12-layer, 768-hidden, 12-heads, 110M parameters
	"""
    print("Downloading BERT-Tokenizer...")
    return BertTokenizer.from_pretrained('bert-base-uncased',
                                         do_lower_case=True)


def load_bert():
    """
	Load BertForSequenceClassification, the pretrained BERT model with a single
	linear classification layer on top.
	Using the 12-layer BERT model, with an uncased vocab -> see tokenizer
	"""
    print("Loading BERT Model for sequence classification")
    model = BertForSequenceClassification.from_pretrained(
        "bert-base-uncased",
        # The number of output labels = 3:
        # positive, neutral, negative
        num_labels=3,
        output_attentions=True,  # return attentions weights?
        output_hidden_states=True,  # return all hidden-states?
    )
    return model
